from lxml import html
from lxml import etree
import urllib2
import logging
from urllib2 import urlopen,Request
import time
import re
import json
from entites import Track
from mongoengine import *

connect('music')

myparser = etree.HTMLParser(encoding="utf-8")
headers = { 'User-Agent' : 'Mozilla/5.0' }


    
def parse(url):
    seconds=1
    while(True):
        try:
            req = Request(url, None, headers)
            page = urlopen(req).read()
            return parseHtml(page)
        except:
            time.sleep(seconds)
            logging.warn("failed to load external url "+url+" will try again in "+str(seconds))
            seconds*=2
def parseHtml(page): 
    root = etree.HTML(page, parser=myparser) 
    return root  

start=False
root=parse("http://www.nogomistars.com/browse.asp?c=a&co=Egypt&s=All")
letters=root.xpath("id('IndDivmainbox_left_ot')//@href")
for letter in letters:
    if letter=='browse.asp?c=a':
        start=True
    if start==False:
        continue
    logging.debug("letter :"+letter)
    url="http://www.nogomistars.com/"+letter+"&co=Egypt&s=All"
    root=parse(url)
    artists=root.xpath("id('IndDivmainbox_m_left_browse_thumb')/div/ul/li/div[3]//@href")
    for artist in artists:
        logging.debug("artist :"+artist)
        url="http://www.nogomistars.com/"+artist
        root=parse(url)
        albums=root.xpath("id('IndDivmainbox_m_left_Top_Albums_thumb_1')/ul/li/div[2]//@href")
        years=root.xpath("id('IndDivmainbox_m_left_Top_Albums_thumb_1')/ul/li/div[3]/font[2]/text()")
        album_names=root.xpath("id('IndDivmainbox_m_left_Top_Albums_thumb_1')/ul/li/div[2]/font[1]/a[1]/text()")
        for album in albums:
            logging.debug("album :"+album)
            i=albums.index(album)
            year=years[i].strip()
            try:
                year=int(re.search("\d\d\d\d",year).group(0))
            except:
                year=0
            url="http://www.nogomistars.com/"+album
            root=parse(url)
            album_name=root.xpath("id('IndDivmainbox_m_left_Top_Albums_top_Title')/font/text()")[0].strip()
            artist_name=root.xpath("id('IndDivmainbox_m_left_path_inner_middle')/div/font/a[4]/text()")[0].strip()
            songs=root.xpath("id('IndDivmainbox_m_left_Top_Albums_thumb_2_inner')/table/tr/td/table/tr/td[2]/a/@href")
            for song in songs:
                logging.debug("song :"+song)
                url="http://www.nogomistars.com/"+song
                root=parse(url)
                song_name=root.xpath("id('IndDivmainbox_m_left_Wayah_top_Title')/font/text()")[0].strip()
                play_url=root.xpath("//embed/@flashvars")[0].split('=')[1].split('&')[0]
                root=parse("http://api.facebook.com/restserver.php?method=links.getStats&urls="+url)
                likes=int(root.xpath("//total_count")[0].text)
                try:
                    lyricist =''
                    composer=''
                    arranger=''
                    lyricist =root.xpath("//li/font")[1].text.strip()
                    composer=root.xpath("//li/font")[3].text.strip()
                    arranger=root.xpath("//li/font")[5].text.strip()
                except:
                    pass
                temp=Track(song_name=song_name ,artist_name=artist_name,play_url=play_url  ,likes=likes   ,lyricsist=lyricist,composer=composer,arranger=arranger,album_name=album_name,year=year,url=url)
                temp.save()
        

